require(nlme)
require(reshape2)
require(ggplot2)
require(multcomp)
require(emmeans)
require(rstatix)
require(ggpubr)
require(RColorBrewer)
require(tidyverse)
require(dplyr)
require(ggbeeswarm)
require(readxl)
require(broom)
require(scales)
require(RCurl)
require(plotly)
require(ggrepel)
require(gridExtra)
require(ggExtra)
require(DT)
source("./HelperFunctions.R")
theme_point<-theme_bw()+theme(strip.background = element_blank())
theme_bar<-theme_bw()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank())
theme_boxplot<-theme_classic()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.title.x = element_blank(),axis.line.x = element_blank(),legend.position = "none")
color_panel<-c("#e35d6a","#5bb75b","#428bca","#e87810","#23496b","#ffbf00","#cc2028","#039748","pink","darkgray")
names(color_panel)<-sort(c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A"))
color_panel1 <- c("#039748","#039748","#ffbf00","#ffbf00","#e35d6a","#e35d6a","#5bb75b","#5bb75b","#428bca","#428bca","#23496b","#23496b","#cc2028","#cc2028","#e87810")
color_panel2 <- brewer.pal(10, name = "Paired")
names(color_panel2) <- c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A")
full_nr <- format_format(big.mark = ",", decimal.mark = ".", scientific = FALSE)
color_panel_spikes <- c("#e35d6a","gray","#5bb75b","#428bca")
names(color_panel_spikes) <- c("LP1","MIMAT","RC1","RC2")
color_panel_spikes2 <- c("#23496b","#ffbf00","gray","#ffbf01","orange","#23496b")
names(color_panel_spikes2) <- c("LP","RC","MIMAT","RC1","RC2","LP1")
set.seed(1)#Annotation
sample_annotation<-read_tsv("./Annotation_exRNAQC017.csv")
colnames(sample_annotation)[23]<-'TimeInterval'
sample_annotation<-sample_annotation %>% mutate(donorID= case_when((Donor == "PNL-XNID") ~ "D5",
(Donor == "PNL-7DEN") ~ "D1",
(Donor == "PNL-8ZI1") ~ "D2",
(Donor == "PNL-NLID") ~ "D3",
(Donor == "PNL-UCH7") ~ "D4"),
SampleID= paste0(GraphKit,"_",GraphTube,"_",TimeInterval,"_",donorID),
tube_kitID=paste0(GraphTube,"_",GraphKit),
tube_kit_timeID=paste0(GraphTube,"_",GraphKit,"_",TimeInterval),
PlasmaInputVml= PlasmaInputV/1000)
sample_annotation$Tube<-as.factor(sample_annotation$Tube)
sample_annotation$RNAisolation<-as.factor(sample_annotation$RNAisolation)
sample_annotation$TimeInterval<-as.factor(sample_annotation$TimeInterval)
sample_annotation$donorID<-as.factor(sample_annotation$donorID)
#Reads
miRNAs <- data.table::fread("mergedTxts/allmiRs_subs.txt", header=T, data.table = F)
spikes <- data.table::fread("mergedTxts/allspikes_subs.txt", header=T, data.table = F)
reads <- data.table::fread("mergedTxts/allreads_subs.txt", header=T, data.table = F)
contam <- data.table::fread("mergedTxts/allcontam_subs.txt", header=T, data.table = F)
reads_total<- read_tsv("./smallRNA_table.tsv")
reads_total <- full_join(reads_total, sample_annotation, by = "RNAID")
Tube.levels<-levels(reads_total$Tube)
RNAisolation.levels<-levels(reads_total$RNAisolation)
TimeInterval.levels<-levels(reads_total$TimeInterval)
#ggplot(reads_total, aes(x = TimeInterval, y = total_reads, group = donorID:RNAisolation)) +
# geom_line(aes(color=RNAisolation))+geom_point() + facet_grid(. ~ Tube) + labs(title = "Raw Number of Reads")
spikes_sum <- spikes %>% gather(key="UniqueID",value="spikecounts",-"spikeID") %>%
group_by(UniqueID) %>% dplyr::summarise(counts=sum(spikecounts, na.rm=T)) %>%
#spread(key=UniqueID,value=spikesum) %>%
mutate(reads="spikes")
# add sum of spikes to the total mapped reads (these were not considered as "mapped" yet)
reads_complete <- reads %>% gather(key="UniqueID",value="counts",-"reads") %>%
rbind(., spikes_sum) %>%
spread(key=reads, value=counts) %>%
mutate(all_mapped = mapped + spikes) %>%
gather(key="reads",value="counts",-"UniqueID")
DT::datatable(sample_annotation %>% dplyr::select(c("RNAID","RNAisolation","PlasmaInputV","donorID","Tube","TimeInterval","Sequinconc","ERCCconc",Abbrevation="GraphKit")), rownames = TRUE, filter="top", options = list(pageLength = 10, scrollX=T), caption = "Sample annotation table")calculation: count mapped miRNAS / count mapped LP spikes
gene_level_ratios <- rbind(
reads %>%
filter(reads=="mapped_miR") %>%
mutate(type="endogenous") %>%
group_by(type) %>%
dplyr::summarise_if(is.numeric, sum, na.rm=TRUE),spikes %>%
mutate(type=gsub(".-..$","",spikeID)) %>%
group_by(type) %>%
dplyr::summarise_if(is.numeric, sum, na.rm=TRUE)
) %>%
gather(., key="UniqueID",value="counts",-type) %>% #long format
spread(., key = "type", value="counts") %>%
mutate("LPvsEndo"=LP/endogenous, "RCvsEndo"=RC/endogenous, "EndovsRC"=endogenous/RC, "EndovsLP"= endogenous/LP) %>%
left_join(., sample_annotation %>%
dplyr::select(c("UniqueID","RNAID","RNAisolation","Tube","SampleID","GraphTube","EluateV","PlasmaInputV", "BiologicalReplicate", "TimeInterval","Donor", "donorID")), by=c("UniqueID" = "RNAID")) #add annotationgene_level_ratios <- gene_level_ratios %>%
mutate("EndovsLP_InputCorr"= (EndovsLP)*EluateV/PlasmaInputV)
Tube.levels<-levels(gene_level_ratios$Tube)
RNAisolation.levels<-levels(gene_level_ratios$RNAisolation)
TimeInterval.levels<-levels(gene_level_ratios$TimeInterval)Linear mixed-effects model with crossed fixed effects of tube, kit and TimeInterval.
EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
weights = varIdent(form = ~ 1 | RNAisolation),
data=gene_level_ratios)
anova(EndovsLP_m1)EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m1)Only Tube, and the interactions Tube:RNAisolation and Tube:TimeInterval are significant. There are no significant three-way interactions.
Next, we look at second-order interactions:
EndovsLP_m2<-lme(EndovsLP~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m2)EndovsLP_m3<-lme(EndovsLP~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_m3)checking normality of residuals
qqnorm(EndovsLP_m3$residuals)
qqline(EndovsLP_m3$residuals)EndovsLP_m.time<-emmeans(EndovsLP_m3, ~TimeInterval|Tube)
plot(EndovsLP_m.time, comparisons=TRUE)pairs(EndovsLP_m.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -6.34 33.1 74 -0.191 0.9800
## T0 - T16 -11.16 33.1 74 -0.337 0.9395
## T04 - T16 -4.82 33.1 74 -0.145 0.9884
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 19.18 33.1 74 0.579 0.8318
## T0 - T16 -113.14 33.1 74 -3.414 0.0030
## T04 - T16 -132.33 33.1 74 -3.993 0.0004
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 24.20 33.1 74 0.730 0.7464
## T0 - T16 58.03 33.1 74 1.751 0.1933
## T04 - T16 33.83 33.1 74 1.021 0.5660
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Now, compare Kits
EndovsLP_m.method<-emmeans(EndovsLP_m3, ~Tube|RNAisolation)
plot(EndovsLP_m.method, comparisons=TRUE)pairs(EndovsLP_m.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -85.1 27.1 74 -3.144 0.0067
## Citrate - serum -14.2 27.1 74 -0.526 0.8587
## EDTA - serum 70.8 27.1 74 2.617 0.0286
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -94.0 27.1 74 -3.473 0.0025
## Citrate - serum -109.0 27.1 74 -4.027 0.0004
## EDTA - serum -15.0 27.1 74 -0.554 0.8447
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
calculation: count mapped miRNAS / count mapped RC spikes
gene_level_ratios$GraphTube2<- paste(gene_level_ratios$Tube, gene_level_ratios$TimeInterval, sep = "_")
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsRC", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes2 <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) +
#geom_bar(position=position_dodge(), stat="identity")+
geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
geom_point(aes(colour=Tube), size=1.2) +
geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
labs(x="", y="relative small RNA conc. (rescaled)", title=NULL, col = NULL, fill = NULL) +
scale_colour_manual(values=color_panel) +
scale_y_log10() +
theme_bar
ggplotly(spikes2)EndovsRC_m1<-lme(EndovsRC~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m1)anova_EndovsRC_m1 <- round(anova(EndovsRC_m1)[c(5, 6, 7), c(4)], digits = 3)EndovsRC_m2<-lme(EndovsRC~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m2)EndovsRC_m3<-lme(EndovsRC~Tube+RNAisolation+TimeInterval+Tube:TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsRC_m3)EndovsRC.time<-emmeans(EndovsRC_m3, ~TimeInterval|Tube)
plot(EndovsRC.time, comparisons=TRUE)pairs(EndovsRC.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -17.647 18.1 76 -0.976 0.5945
## T0 - T16 -18.139 18.1 76 -1.003 0.5774
## T04 - T16 -0.493 18.1 76 -0.027 0.9996
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 13.113 18.1 76 0.725 0.7495
## T0 - T16 -51.912 18.1 76 -2.870 0.0145
## T04 - T16 -65.025 18.1 76 -3.595 0.0016
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 16.067 18.1 76 0.888 0.6493
## T0 - T16 18.463 18.1 76 1.021 0.5662
## T04 - T16 2.396 18.1 76 0.132 0.9904
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
EDTA has a significantly higher smallRNA concentration in plasma at time T16.
EndovsRC.time<-emmeans(EndovsRC_m3, ~Tube|TimeInterval)
plot(EndovsRC.time, comparisons=TRUE)pairs(EndovsRC.time)## TimeInterval = T0:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -64.83 18.1 76 -3.584 0.0017
## Citrate - serum -29.56 18.1 76 -1.634 0.2377
## EDTA - serum 35.28 18.1 76 1.950 0.1318
##
## TimeInterval = T04:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -34.07 18.1 76 -1.884 0.1504
## Citrate - serum 4.16 18.1 76 0.230 0.9713
## EDTA - serum 38.23 18.1 76 2.114 0.0938
##
## TimeInterval = T16:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -98.61 18.1 76 -5.451 <.0001
## Citrate - serum 7.04 18.1 76 0.389 0.9199
## EDTA - serum 105.65 18.1 76 5.841 <.0001
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
gene_pwc <- gene_level_ratios %>% group_by(Tube) %>% pairwise_t_test(EndovsRC ~ TimeInterval) %>% add_xy_position(x = "TimeInterval")
# Show p-values
# ggplot(gene_level_ratios, aes(x = TimeInterval, y = EndovsRC, color = TimeInterval)) +
# geom_boxplot()+
# geom_point()+
# scale_colour_manual(values=color_panel)+
# facet_wrap(~ Tube) +
# stat_pvalue_manual(gene_pwc, label = "p.signif") +
# labs(title = "Time lapse comparison within tubes (T test)", y = "miRNA vs RC concentration")Calculation: Endo/LP * EluateV/PlasmaInputV
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsLP_InputCorr", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes_conc <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) +
#geom_bar(position=position_dodge(), stat="identity")+
facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
geom_point(aes(colour=Tube), size=1.2) +
geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
labs(x="", y="relative endogenous small RNA concentration", title=NULL, subtitle="endogenous small RNA vs LP (rescaled)", col = NULL) +
scale_colour_manual(values=color_panel) +
scale_y_log10() +
theme_bar
ggplotly(spikes_conc)EndovsLP_InputCorr_m1<-lme(EndovsLP_InputCorr~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m1)anova_EndovsLP_InputCorr_m1 <- round(anova(EndovsLP_InputCorr_m1)[c(5, 6, 7), c(4)], digits = 3)EndovsLP_InputCorr_m2<-lme(EndovsLP_InputCorr~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m2)EndovsLP_InputCorr_m3<-lme(EndovsLP_InputCorr~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random= ~1|donorID,
data=gene_level_ratios)
anova(EndovsLP_InputCorr_m3)qqnorm(EndovsLP_InputCorr_m3$residuals)
qqline(EndovsLP_InputCorr_m3$residuals)EndovsLP_InputCorr_m.time<-emmeans(EndovsLP_InputCorr_m3, ~TimeInterval|Tube)
plot(EndovsLP_InputCorr_m.time, comparisons=TRUE)pairs(EndovsLP_InputCorr_m.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -0.656 1.7 74 -0.386 0.9212
## T0 - T16 -1.040 1.7 74 -0.612 0.8138
## T04 - T16 -0.384 1.7 74 -0.226 0.9722
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.602 1.7 74 0.943 0.6149
## T0 - T16 -6.004 1.7 74 -3.535 0.0020
## T04 - T16 -7.606 1.7 74 -4.478 0.0001
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.407 1.7 74 0.828 0.6866
## T0 - T16 2.703 1.7 74 1.591 0.2559
## T04 - T16 1.295 1.7 74 0.763 0.7270
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
EndovsLP_InputCorr_m.method<-emmeans(EndovsLP_InputCorr_m3, ~Tube|RNAisolation)
plot(EndovsLP_InputCorr_m.method, comparisons=TRUE)pairs(EndovsLP_InputCorr_m.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -8.51 1.39 74 -6.133 <.0001
## Citrate - serum -1.42 1.39 74 -1.027 0.5624
## EDTA - serum 7.08 1.39 74 5.106 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -3.13 1.39 74 -2.258 0.0682
## Citrate - serum -3.63 1.39 74 -2.619 0.0285
## EDTA - serum -0.50 1.39 74 -0.360 0.9310
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
number of miRNA with counts > 6
cutoff_kit <- data.frame(Tube = sample_annotation$Tube, GraphTube = sample_annotation$GraphTube, median_th = 3)miRNAs_long <- miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate)), by=c("UniqueID" = "RNAID"))
#keep only miRNA coding genes with more than 0 counts
miRNAs_long <- miRNAs_long %>% filter(est_counts > 0)
number_miR_detected <- miRNAs_long %>% group_by(SampleID) %>% dplyr::summarize(miR_above0=n())
number_miR_detected <- full_join(number_miR_detected,
miRNAs_long %>% group_by(SampleID) %>%
dplyr::summarize(total_est_counts_above0=sum(est_counts)),
by="SampleID")
#cutoff_kit <- single_pos %>% group_by(GraphTube) %>% dplyr::summarise(median_th = median(filter_threshold))
miRNAs_cutoff <- miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate, TimeInterval)), by=c("UniqueID" = "RNAID"))
#left_join(., cutoff_kit, by="GraphTube") #add the median cut-off for each kit
miRNAs_cutoff <- miRNAs_cutoff %>%
filter(est_counts > 6)
number_miR_detected <- full_join(number_miR_detected,
miRNAs_cutoff %>% group_by(SampleID) %>%
dplyr::summarize(miR_aboveTh=n()),
by="SampleID")
number_miR_detected <- full_join(number_miR_detected,
miRNAs_cutoff %>% group_by(SampleID) %>%
dplyr::summarize(total_est_counts_aboveTh=sum(est_counts)),
by="SampleID")
number_miR_detected <- left_join(number_miR_detected,
dplyr::select(sample_annotation, c(UniqueID,RNAID, GraphTube, GraphKit, RNAisolation, EluateV,SampleID, Tube, BiologicalReplicate, TimeInterval, donorID)),
by="SampleID")
#convert to the original format
miRNAs_cutoff <- dplyr::select(miRNAs_cutoff, MIMATID, UniqueID, est_counts, Tube, BiologicalReplicate, TimeInterval) %>%
spread(., key=UniqueID, value=est_counts)
#write.csv( miRNAs_cutoff, file="../../../exRNAQC/ miRNAs_cutoff.csv",row.names=FALSE, na="")count_m1<-lme(miR_aboveTh~Tube*RNAisolation*TimeInterval,
random=~1|donorID,
data=number_miR_detected)
anova(count_m1)anova_count_m1 <- round(anova(count_m1)[c(5, 6, 7), c(4)], digits = 3)count_m2<-lme(miR_aboveTh~(Tube+RNAisolation+TimeInterval)^2,
random=~1|donorID,
data=number_miR_detected)
anova(count_m2)count_m3<-lme(miR_aboveTh~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|donorID,
data=number_miR_detected)
anova(count_m3)qqnorm(count_m3$residuals)
qqline(count_m3$residuals)count_m3.time<-emmeans(count_m3, ~TimeInterval|Tube)
plot(count_m3.time, comparisons=TRUE)pairs(count_m3.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 15.2 5.24 74 2.901 0.0134
## T0 - T16 26.2 5.24 74 5.000 <.0001
## T04 - T16 11.0 5.24 74 2.099 0.0969
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 -0.5 5.24 74 -0.095 0.9950
## T0 - T16 -12.5 5.24 74 -2.385 0.0508
## T04 - T16 -12.0 5.24 74 -2.290 0.0635
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.6 5.24 74 0.305 0.9499
## T0 - T16 5.6 5.24 74 1.069 0.5364
## T04 - T16 4.0 5.24 74 0.763 0.7265
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
count_m3.time<-emmeans(count_m3, ~Tube|TimeInterval)
plot(count_m3.time, comparisons=TRUE)pairs(count_m3.time)## TimeInterval = T0:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 18.0 5.24 74 3.435 0.0028
## Citrate - serum -10.4 5.24 74 -1.985 0.1231
## EDTA - serum -28.4 5.24 74 -5.420 <.0001
##
## TimeInterval = T04:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 2.3 5.24 74 0.439 0.8994
## Citrate - serum -24.0 5.24 74 -4.580 0.0001
## EDTA - serum -26.3 5.24 74 -5.019 <.0001
##
## TimeInterval = T16:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -20.7 5.24 74 -3.950 0.0005
## Citrate - serum -31.0 5.24 74 -5.916 <.0001
## EDTA - serum -10.3 5.24 74 -1.966 0.1280
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
#T test
# Pairwise comparisons
count_pwc <- number_miR_detected %>% group_by(TimeInterval) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
geom_boxplot() +
geom_point() +
facet_wrap(~ as.factor(TimeInterval)) +
stat_pvalue_manual(count_pwc, label = "p.adj") +
scale_color_manual(values=color_panel)+
scale_fill_manual(values=color_panel)+
theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
labs(y = "sensitivity")
count_plotcount_m3.method<-emmeans(count_m3, ~Tube|RNAisolation)
plot(count_m3.method, comparisons=TRUE)pairs(count_m3.method)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 24.2 4.28 74 5.656 <.0001
## Citrate - serum -3.8 4.28 74 -0.888 0.6495
## EDTA - serum -28.0 4.28 74 -6.544 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -24.5 4.28 74 -5.718 <.0001
## Citrate - serum -39.8 4.28 74 -9.302 <.0001
## EDTA - serum -15.3 4.28 74 -3.584 0.0017
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
count_pwc <- number_miR_detected %>% group_by(GraphKit) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
geom_boxplot() +
geom_point() +
facet_wrap(~ as.factor(GraphKit)) +
stat_pvalue_manual(count_pwc, label = "p.adj", y.position = c(215, 225, 235)) +
scale_color_manual(values=color_panel)+
scale_fill_manual(values=color_panel)+
theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
labs(y = "sensitivity")
count_plotggsave("./smallSensitivity.pdf", plot = count_plot, dpi = 300)## Saving 7 x 5 in image
The ALC values are summarized based on the average of the ALC values (after removing NAs). The dot plot at the end gives an overview of all these values.
Score: lower ALC = better
ALC_m1<-lme(ALC_calc~Tube*RNAisolation*TimePoint,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m1)anova_ALC_m1 <- round(anova(ALC_m1)[c(5, 6, 7), c(4)], digits = 3)There is a significant interaction between Tube:RNAIsolatiom
Now, second-order interactions
ALC_m2<-lme(ALC_calc~(Tube+RNAisolation+TimePoint)^2,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m2)There interaction between Tube:RNAIsolatiom remains significant.
Now, the model keeping only the significant interaction
ALC_m3<-lme(ALC_calc~Tube+RNAisolation+TimePoint+Tube:RNAisolation,
random=~1|BiologicalReplicate,
data=ALC)
anova(ALC_m3)qqnorm(ALC_m3$residuals)
qqline(ALC_m3$residuals)ALC_m3.methods<-emmeans(ALC_m3, ~Tube|RNAisolation)
plot(ALC_m3.methods, comparisons=TRUE)pairs(ALC_m3.methods)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 0.00600 0.0109 49 0.548 0.8480
## Citrate - serum 0.02292 0.0109 49 2.093 0.1017
## EDTA - serum 0.01692 0.0109 49 1.545 0.2790
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA 0.04688 0.0109 49 4.283 0.0002
## Citrate - serum 0.04910 0.0109 49 4.485 0.0001
## EDTA - serum 0.00222 0.0109 49 0.202 0.9777
##
## Results are averaged over the levels of: TimePoint
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
ALC_pwc <- ALC %>% group_by(RNAisolation) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
alc_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color = Tube)) +
geom_boxplot()+
geom_point()+
scale_colour_manual(values=color_panel)+
facet_wrap(~ RNAisolation) +
stat_pvalue_manual(ALC_pwc, label = "p.signif") +
theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
labs(title = "Tube comparison within kits (T test)", y = "Area left of the curve")
alc_plotWe are usually interested in miRNAs, and the other biotypes are often seen as contaminants (see above). The fraction was calculated as the number of miRNA reads / total reads (mapped + spikes)
perc_miRs <- miRNAs %>% gather(key="UniqueID",value="counts", -"MIMATID") %>% mutate(type=gsub("[0-9].*$","",MIMATID)) %>%
dplyr::group_by(UniqueID,type) %>%
dplyr::summarise(sum_counts = sum(counts, na.rm=T)) %>% #calculate sum of MIMAT per sample
full_join(reads_complete %>% filter(reads=="all_mapped")) %>%
mutate(perc=sum_counts/counts*100) %>% #calculate perc of miRs and spikes of total mapped reads
left_join(sample_annotation, by=c("UniqueID" = "RNAID"))Biotype_m1<-lme(perc~Tube*RNAisolation*TimeInterval,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m1)anova_Biotype_m1 <- round(anova(Biotype_m1)[c(5, 6, 7), c(4)], digits = 3)Second order:
Biotype_m2<-lme(perc~(Tube+RNAisolation+TimeInterval)^2,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m2)Keeping only the significant interactions:
Biotype_m3<-lme(perc~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
random=~1|BiologicalReplicate,
data=perc_miRs)
anova(Biotype_m3)Biotype_m3.methods<-emmeans(Biotype_m3, ~Tube|RNAisolation)
plot(Biotype_m3.methods, comparisons=TRUE)pairs(Biotype_m3.methods)## RNAisolation = Maxwell:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -6.09 2.46 78 -2.474 0.0407
## Citrate - serum 14.81 2.46 78 6.021 <.0001
## EDTA - serum 20.90 2.46 78 8.495 <.0001
##
## RNAisolation = miRNeasySPAkit:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -38.44 2.46 78 -15.628 <.0001
## Citrate - serum -15.22 2.46 78 -6.189 <.0001
## EDTA - serum 23.22 2.46 78 9.439 <.0001
##
## Results are averaged over the levels of: TimeInterval
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
Biotype_pwc <- perc_miRs %>% group_by(RNAisolation) %>% pairwise_t_test(perc ~ Tube) %>% add_xy_position(x = "Tube")
# Show p-values
Biotype_plot<-ggplot(perc_miRs, aes(x = Tube, y = perc, color = Tube)) +
geom_boxplot()+
geom_point()+
scale_colour_manual(values=color_panel)+
facet_wrap(~ RNAisolation) +
stat_pvalue_manual(Biotype_pwc, label = "p.signif") +
theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
labs(title = "Tube comparison within kits (T test)", y = "miRNA counts (%)")
Biotype_plotBiotype_m3.time<-emmeans(Biotype_m3, ~TimeInterval|Tube)
plot(Biotype_m3.time, comparisons=TRUE)pairs(Biotype_m3.time)## Tube = Citrate:
## contrast estimate SE df t.ratio p.value
## T0 - T04 0.590 3.01 78 0.196 0.9791
## T0 - T16 13.105 3.01 78 4.350 0.0001
## T04 - T16 12.515 3.01 78 4.154 0.0002
##
## Tube = EDTA:
## contrast estimate SE df t.ratio p.value
## T0 - T04 1.672 3.01 78 0.555 0.8442
## T0 - T16 2.018 3.01 78 0.670 0.7816
## T04 - T16 0.346 3.01 78 0.115 0.9928
##
## Tube = serum:
## contrast estimate SE df t.ratio p.value
## T0 - T04 4.579 3.01 78 1.520 0.2873
## T0 - T16 7.874 3.01 78 2.614 0.0286
## T04 - T16 3.295 3.01 78 1.094 0.5208
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
Biotype_m3.time<-emmeans(Biotype_m3, ~Tube|TimeInterval)
plot(Biotype_m3.time, comparisons=TRUE)pairs(Biotype_m3.time)## TimeInterval = T0:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -18.929 3.01 78 -6.283 <.0001
## Citrate - serum 0.208 3.01 78 0.069 0.9974
## EDTA - serum 19.136 3.01 78 6.352 <.0001
##
## TimeInterval = T04:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -17.846 3.01 78 -5.924 <.0001
## Citrate - serum 4.197 3.01 78 1.393 0.3495
## EDTA - serum 22.043 3.01 78 7.317 <.0001
##
## TimeInterval = T16:
## contrast estimate SE df t.ratio p.value
## Citrate - EDTA -30.016 3.01 78 -9.963 <.0001
## Citrate - serum -5.023 3.01 78 -1.667 0.2242
## EDTA - serum 24.992 3.01 78 8.296 <.0001
##
## Results are averaged over the levels of: RNAisolation
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 3 estimates
overview <- data.frame(row.names = c("Tube:RNAisolation","Tube:TimeInterval","RNAisolation:TimeInterval"), "RNA concentration" = anova_EndovsRC_m1, "extraction efficiency" = anova_EndovsLP_InputCorr_m1, "sensitivity" = anova_count_m1, "reproducibility" = anova_ALC_m1, "biotype" = anova_Biotype_m1, check.names = FALSE)
overview_t <- t(overview)
#breaks for heatmap
bk1 <- c(seq(0,0.051,by=0.005))
#make color palette
custom_palette = c(colorRampPalette(brewer.pal(n = 7, name =
"RdBu"))(20))
custom_palette2 = c(custom_palette[1:10], 'white')
#make matrix with labels for heatmap
labels_tab <- overview_t
labels_tab[labels_tab<0.001]<-"<0.001"
#make heatmap
phm<-pheatmap::pheatmap(overview_t,
breaks = bk1,
display_numbers = labels_tab,fontsize_number=11, number_color="black",
color = custom_palette2,
angle_col = 45,
cluster_rows=FALSE, cluster_cols=FALSE)ggsave("./smallInteractions2.pdf", plot = phm, dpi = 300)## Saving 7 x 5 in image